home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Personal Computer World 2009 February
/
PCWFEB09.iso
/
Software
/
Linux
/
Kubuntu 8.10
/
kubuntu-8.10-desktop-i386.iso
/
casper
/
filesystem.squashfs
/
usr
/
bin
/
bogoupgrade
< prev
next >
Wrap
Text File
|
2008-06-18
|
6KB
|
250 lines
#! /usr/bin/perl -w
=pod
=head1 NAME
bogoupgrade -- upgrade a bogofilter database to current version.
=cut
# bogofilter-0.3 through bogofilter-0.6.3
#
# HEADER "# bogofilter email-count (format version A): %lu"
#
# bogofilter-0.7.x
#
# HEADER "# bogofilter email-count (format version B): %lu"
#
# bogofilter-0.8 to bogofilter-0.13.x
#
# BerkeleyDB with double wordlists
# spamlist.db and goodlist.db
# bogofilter-0.14 and later
#
# BerkeleyDB with single wordlist
# wordlist.db
require 5.005_03;
use strict;
use filetest 'access';
use Pod::Usage;
sub convert_double_to_single();
sub convert_format_A();
sub convert_format_B();
sub db_verify($);
my $VERSION = '0.5.0';
my ($in, $out, $dir, $yday, $msg_count_token);
my $bogoutil = 'bogoutil';
for (my $i = 0; $i < @ARGV; $i++){
my $arg = $ARGV[$i];
if ($arg eq '-d'){
$dir = $ARGV[++$i];
} elsif ($arg eq '-i'){
$in = $ARGV[++$i];
} elsif ($arg eq '-o'){
$out = $ARGV[++$i];
} elsif ($arg eq '-b'){
$bogoutil = $ARGV[++$i];
} elsif ($arg eq '-y'){
$yday = "-y $ARGV[++$i]";
} elsif ($arg eq '-h' or $arg eq '--help'){
pod2usage(-verbose => 1, -exitstatus => 0);
} else {
pod2usage(-verbose => 0, -exitstatus => 1);
}
}
my $db_ext = $ENV{DB_EXT} || 'db';
if ($dir) {
if ($in || $out) {
pod2usage("Cannot use -d at the same time as -i or -o!");
}
convert_double_to_single();
} else {
pod2usage("Missing input filename.\nAborting") unless $in;
pod2usage("Missing output filename.\nAborting") unless $out;
my $msg_count_token = '.MSG_COUNT';
open(F, "< $in") or die "Cannot open input file [$in]. $!.\nAborting";
my $sig = <F>;
chomp($sig);
if ($sig =~ m/^\# bogofilter wordlist \(format version A\):\s(\d+)$/){
convert_format_A();
}
elsif ($sig =~ m/^\# bogofilter email-count \(format version B\):\s(\d+)/){
convert_format_B();
}
else {
$sig =~ y/[\040-\177]/_/cs;
warn "Cannot recognize signature [$sig].\n";
exit(2);
}
}
sub db_verify($) {
my ($f) = shift;
if (!-r $f) {
die "Database $f is not readable: $!\nAborting";
}
my $ret = system ($bogoutil, '--db-verify', $f);
die "Database $f is corrupt.\nAborting" if $ret;
}
sub convert_format_A() {
my $msg_count = $1;
my $cmd = "$bogoutil $yday -l $out";
open(OUT, "| $cmd") or die "Cannot run command \"$cmd\": $!\nAborting";
while(<F>){
print OUT $_;
}
print OUT "$msg_count_token $msg_count\n";
close(OUT) or die "Error executing command \"$cmd\": $!\nAborting";
close(F);
}
sub convert_format_B() {
my $msg_count = $1;
my $in_db = $in;
$in_db =~ s/count$/$db_ext/;
unless (-f $in_db){
warn("Cannot find database file [$in_db]\n corresponding to input file [$in]\n");
exit;
}
my $cmd = "$bogoutil $yday -l $out";
open(OUT, "| $cmd") or die "Cannot run command \"$cmd\": $!\nAborting";
close(F);
db_verify($in_db);
$cmd = "$bogoutil $yday -d $in_db";
open(F, "$cmd |") or die "Cannot run command \"$cmd\": $!\nAborting";
while(<F>){
if (m/^\.count\s+(\d+)$/){
warn("Found a message count of [$1] in db.\nThrowing away text file count of [$msg_count]\n");
$msg_count = $1;
next;
}
elsif (/^$msg_count_token\s(\d+)$/){
warn("This database appears to have been upgraded already.\nBut there's no harm in doing it again.\n");
$msg_count = $1;
next;
}
print OUT $_;
}
print OUT "$msg_count_token $msg_count\n";
close(F);
close(OUT);
}
# args: filename format
sub cvt2to1_get($$) {
my ($filename, $format) = @_;
my @tmp;
my $cmd;
die "need array context in cvs2to1_get.\nAborting" unless wantarray;
db_verify($filename);
$cmd = "$bogoutil -d $filename";
open(I, "$cmd |") or die "cannot run \"$cmd\": $!\nAborting";
while (<I>) {
chomp;
my @a = split;
push @tmp, sprintf($format, $a[0], $a[1], $a[2]);
}
close I or die "error running \"$cmd\": $!\nAborting";
return @tmp;
}
sub convert_double_to_single() {
my @tmp;
my $word = "$dir/wordlist.$db_ext";
my $spam = "$dir/spamlist.$db_ext";
my $good = "$dir/goodlist.$db_ext";
if (-e $word) { die "$word already exists.\nAborting"; }
push @tmp, cvt2to1_get($spam, "%s %d 0 %d");
push @tmp, cvt2to1_get($good, "%s 0 %d %d");
my $cmd = "$bogoutil -l $word";
open(O, "| $cmd") or die "cannot run \"$cmd\": $!\nAborting";
print O join("\n", sort @tmp), "\n";
close O or die "error executing \"$cmd\": $!\nAborting";
}
exit(0);
__END__
=pod
=head1 SYNOPSIS
bogoupgrade [options] -d <bogofilter directory>
bogoupgrade [options] -i <input text file> -o <output db file>
bogoupgrade {-h|--help}
Options:
-b <path to bogoutil>
=head1 DESCRIPTION
B<bogoupgrade> updates bogofilter databases from older formats to the current
format.
=head1 OPTIONS
=over 8
=item B<-d <directory>>
Name of directory containing database files. Old files will be read and new
files will be written.
=item B<-i <input file>>
Text file containing message count, and possibly wordlist data records. If the
file only contains a message count but no word list records, there must be a
database file, in the same directory as the text file, which contains the word
list data.
=item B<-o <output file>>
Output database file. Use the appropriate file extension for your version of
bogofilter, i.e. '.db' for Berkeley DB and SQLite3, or '.qdbm' for QDBM.
=item B<-b <path to bogoutil program>>
Defaults to 'bogoutil', in the hopes that your shell will find it.
=item B<-h> | B<--help>
Print the help - you are currently reading it.
=back
=head1 AUTHORS
Gyepi Sam <gyepi@praxis-sw.com>
David Relson <relson@osagesoftware.com>
Matthias Andree <matthias.andree@gmx.de>
=cut
# vim: set filetype=perl ai: